set.seed(123)
#require(gRbase) #for faster computations in the internal functions
require(hash)
#simulate a dataset with continuous data
dataset <- matrix(runif(1000 * 1000, 1, 100), ncol = 1000)
#define a simulated class variable
target <- 3 * dataset[, 10] + 2 * dataset[, 200] + 3 * dataset[, 20] + rnorm(1000, 0, 5)
#define some simulated equivalences
dataset[, 15] <- dataset[,10] + rnorm(1000, 0, 2)
dataset[, 10] <- dataset[ ,10] + rnorm(1000, 0, 2)
dataset[, 250] <- dataset[,200] + rnorm(1000, 0, 2)
dataset[, 230] <- dataset[,200] + rnorm(1000, 0, 2)
require("hash", quietly = TRUE)
#run the SES algorithm
sesObject <- SES(target , dataset, max_k = 5, threshold = 0.05, test = "testIndFisher",
hash = TRUE, hashObject = NULL);
#print summary of the SES output
summary(sesObject);
#plot the SES output
plot(sesObject, mode = "all");
#get the queues with the equivalences for each selected variable
sesObject@queues
#get the generated signatures
sesObject@signatures;
#get the run time
sesObject@runtime;
#re-run the SES algorithm with the same or different configuration
#under the hash-based implementation of retrieving the statistics
#in the SAME dataset (!important)
#hashObj <- sesObject@hashObject;
#sesObject2 <- SES(target, dataset, max_k = 2, threshold = 0.01, test = "testIndFisher",
#hash = TRUE, hashObject = hashObj);
#retrieve the results: summary, plot, sesObject2@...)
#summary(sesObject2)
#get the run time
#sesObject2@runtime;
#MMPCObject <- MMPC(target , dataset , max_k=3 , threshold=0.05 , test="testIndFisher",
#hash = FALSE, hashObject=NULL);
#MMPCObject@selectedVars
#MMPCObject@runtime
Run the code above in your browser using DataLab